# 开发作者: HeBugui
# 开发时间 :2022/3/15 22:07
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

exercise = pd.read_csv("junyi_data/junyi_Exercise_table.csv")


#df数据转化为字典列表
def to_dict_list(count_info):
    to_dict = count_info.to_dict()
    list = []
    for key, value in to_dict.items():
        area = {'value': 0, 'name': ""}
        area['value'] = value
        area['name'] = key
        list.append(area)
    return list


#基本信息
def base_info():
    print("*"*100,"基本信息")
    print(exercise.info())
    print(exercise.head())
    print(exercise.describe())

#分类统计area
def area_count():
    print("*"*100,"分类统计area")
    area_count = exercise["area"].value_counts()
    to_dict = area_count.to_dict()
    list = []
    for key,value in to_dict.items():
        area = {'value':0,'name':""}
        area['value'] = value
        area['name'] = key
        list.append(area)
    return list

#分类统计topic
def topic_count():
    print("*"*100,"分类统计topic")
    topic_count = exercise["topic"].value_counts()
    return to_dict_list(topic_count[:10])

#topic缺失项
# topic_nan = exercise[exercise["topic"]==' ']
# print(topic_nan)

#分类统计live
def live_count():
    print("*"*100,"分类统计live")
    live_count = exercise["live"].value_counts()
    to_dict = live_count.to_dict()
    list = []
    for key, value in to_dict.items():
        area = {'value': 0, 'name': ""}
        area['value'] = value
        area['name'] = 'true' if key==True else 'false'
        list.append(area)
    return list

#分类统计prerequisites
def prerequisites_count():
    print("*"*100,"分类统计prerequisites")
    prerequisites_count = exercise["prerequisites"].value_counts()
    return to_dict_list(prerequisites_count[:10])

#绘制创建日期箱型线图
def create_date_box():
    exercise['creation_date']=exercise['creation_date'].apply(pd.to_datetime,format='%Y-%m-%d %H:%M:%S.%f UTC')
    print(exercise['creation_date'])
    sns.boxplot(y='creation_date',data=exercise)
    plt.show()

#series相当于一维数组，即python中的列表,索引比较特殊
#exercise count on topic
def exercise_topic_count():
    topic_count = exercise["topic"].value_counts(ascending=True)
    return topic_count

#exercise count on area
def exercise_area_count():
    area_count = exercise["area"].value_counts(ascending=True)

    return area_count

#exercise count on prerequisites
def exercise_prerequisites_count():
    prerequisites_count = exercise["prerequisites"].value_counts(ascending=True)
    return prerequisites_count

#Exercises distribution on area in knowledge map 字典列表
def exercise_distribution_by_area():
    set_area = list(set(exercise['area'].dropna()))
    colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF']#颜色
    series_data = []
    for i in range(len(set_area)):
        name = set_area[i]
        color = colors[i]
        data = exercise[exercise['area'] == set_area[i]][['v_position','h_position']]
        data = data.to_numpy().tolist()
        dict = {'name': name, 'color': color,'data': data}
        series_data.append(dict)
    return series_data

#Exercises distribution on topic in knowledge map 字典列表
def exercise_distribution_by_topic():
    set_topic = list(set(exercise['topic'].dropna()))
    colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF','#FECB52',
              '#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692'] # 颜色
    series_data = []
    for i in range(len(set_topic)):
        name = set_topic[i]
        color = colors[i%17] #topic 41个，颜色循环利用
        data = exercise[exercise['topic'] == set_topic[i]][['v_position', 'h_position']]
        data = data.to_numpy().tolist()
        dict = {'name': name, 'color': color, 'data': data}
        series_data.append(dict)
    return series_data

if __name__ == '__main__':
    exercise_distribution_by_topic()